use data_bible_final, clear

merge 1:m translation using entropy_bible_constant, keep(3)

preserve
gen 		label="Inupiatun (esk)" if ISO=="esk"
replace label="Chinese (cmn)" if ISO=="cmn"
replace label="German (deu)" if ISO=="deu"
replace label="Vietnamese (vie)" if ISO=="vie"
replace label="Cherokee (chr)" if language=="Cherokee"
replace label="English (eng)" if language=="English"
replace label="Burmese (mya)" if language=="Burmese"
replace label="Tamil (tam)" if language=="Tamil"
replace label="Koine Greek (grc)" if trans=="ell-x-bible-koine1894"
replace label="Quechua (qvw)" if language=="Huaylla Wanca Quechua"
replace label="Kuo (xuo)" if language=="Kuo"
replace label="Zulu (zul)" if language=="Zulu"

keep if label!=""


collapse (mean) D*, by(label book)



egen D_structure_rank=rank(D_structure), by(label) field
egen D_order_rank=rank(D_order), by(label) field

gen denom=(D_structure_rank-3.5)*(D_order_rank-3.5)
gen nomx=(D_structure_rank-3.5)^2
gen nomy=(D_order_rank-3.5)^2

bysort label: egen sum_denom=total(denom)
bysort label: egen sum_nomx=total(nomx)
bysort label: egen sum_nomy=total(nomy)
gen corr=sum_denom/sqrt(sum_nomx*sum_nomy)

bysort label: gen id=_n
qui sum corr if id==1, d 
noisily di %3.2f r(p50)

gen title="Mt"
replace title="Mr" if book==41
replace title="Lk" if book==42
replace title="Jn" if book==43
replace title="Ac" if book==44
replace title="Re" if book==66

twoway (scatter D_structure D_order, msymbol(i) jitter(1) jitterseed(270616) mlab(title)  mlabsize(medlarge) mlabpos(c) ylabel(,nogrid)), ///
by(label, cols(3) yrescale xrescale  l1title("Word structure information") b1title("Word order information") graphregion(color(white)) note("")) subtitle(,  color(black) bcolor(white)  size(vlarge))  ///
graphregion(color(white)) scheme(s2mono) xtitle("") ytitle("") yscale(nofextend) xscale(nofextend) ///
ylabel(minmax, format(%3.2f) nogrid angle(h)) xlabel(minmax,format(%3.2f) nogrid) ysize(1.3) xsize(1)
graph export fig4.png, height(10000) replace


/* permutation test to determine significance of correlation as described in the text */
restore
gen 	label="Inupiatun" if ISO=="esk"
replace label="Chinese" if ISO=="cmn"
replace label="German" if ISO=="deu"
replace label="Vietnamese" if ISO=="vie"
replace label="Cherokee" if language=="Cherokee"
replace label="English" if language=="English"
replace label="Burmese" if language=="Burmese"
replace label="Tamil" if language=="Tamil"
replace label="KoineGreek" if trans=="ell-x-bible-koine1894"
replace label="Quechua" if language=="Huaylla Wanca Quechua"
replace label="Kuo" if language=="Kuo"
replace label="Zulu" if language=="Zulu"
keep if label!=""


collapse (mean) D*, by(label book)



egen D_structure_rank=rank(D_structure), by(label) field
egen D_order_rank=rank(D_order), by(label) field

qui {
local bibles
local till=_N
forvalues i=1/`till' {
	preserve
	local label=label[`i']
	local bibles `"`bibles' `label'"'
	keep if label=="`label'"
	keep book *rank
	rename D_structure_rank structure_`label'
	rename D_order_rank order_`label'
	save `i', replace
	restore
}

use 1, replace
capture erase 1.dta
forvalues i=2/`till' {
	merge 1:1 book using `i', nogenerate
	capture erase `i'.dta
}
save temp2, replace
}

preserve
local type order
local rho=1
foreach b1 of local bibles {
	foreach b2 of local bibles {
		qui corr `type'_`b1' `type'_`b2' 
		if r(rho)<`rho' {
			local rho=r(rho)
			local bible1 `b1'
			local bible2 `b2'
		}
	}
}
	noisily di "Smallest: `rho' (`bible1' / `bible2')"
	putmata V1=`type'_`bible1', replace
	putmata V2=`type'_`bible2', replace
	local corr=`rho'

mata
num = den = 0
info = cvpermutesetup(V1)
while ((V1=cvpermute(info)) != J(0,1,.)) {
	rho = correlation((V1,V2))[2,1]
	if (rho>=`corr') num++
	den++
}
(num, den, num/den)
end

getmata num=num, replace force
getmata den=den, replace force
local p_value=num[1]/den[1]
local p_value: di %4.3f `p_value'
local rho: di %4.3f `rho'

local `type'_text `"Rank corr. min. (word order): `rho' (p = `p_value' (`bible1' / `bible2'))"'
restore, preserve

local type structure
local rho=1
foreach b1 of local bibles {
	foreach b2 of local bibles {
		qui corr `type'_`b1' `type'_`b2' 
		if r(rho)<`rho' {
			local rho=r(rho)
			local bible1 `b1'
			local bible2 `b2'
		}
	}
}
	noisily di "Smallest: `rho' (`bible1' / `bible2')"
	putmata V1=`type'_`bible1', replace
	putmata V2=`type'_`bible2', replace
	local corr=`rho'

mata
num = den = 0
info = cvpermutesetup(V1)
while ((V1=cvpermute(info)) != J(0,1,.)) {
	rho = correlation((V1,V2))[2,1]
	if (rho>=`corr') num++
	den++
}
(num, den, num/den)
end

getmata num=num, replace force
getmata den=den, replace force
local p_value=num[1]/den[1]
local p_value: di %4.3f `p_value'
local rho: di %4.3f `rho'

local `type'_text `"Rank corr. min. (word structure): `rho' (p = `p_value' (`bible1' / `bible2'))"'
restore

/* average correlations */

/* order */
di `"`order_text'"'
qui corr order*
mata
C = st_matrix("r(C)")
order=(abs(select(vech(C), vech(C) :< 1)))
end

preserve
******************order****************
clear
getmata order 
qui sum order, d
di %3.2f r(p25)
di %3.2f r(p50)
di %3.2f r(p75)
restore

/* structure */
di `"`structure_text'"'
qui corr structure*
mata
C = st_matrix("r(C)")
structure=(abs(select(vech(C), vech(C) :< 1)))
end

preserve
******************structure****************
clear
getmata structure 
qui sum structure, d
di %3.2f r(p25)
di %3.2f r(p50)
di %3.2f r(p75)
restore



window manage close graph 
exit